#!/usr/bin/perl -w -I/usr/local/devprac/lib/pm

use strict;

use Getopt::Long;
use util::args;

my(@keys,@sums,@cnts);
my($k,$s,$c,$d,$h,$r,$preserve_header);
$h=0;
$r=0;			# sort order flag - if set, use original order
$preserve_header = 0;	# by default, include all lines in aggregation
$d = chr(0xfe);

&GetOptions(	"k=s"	=> \$k,
		"s=s"	=> \$s,
		"c=s"	=> \$c,
		"d=s"	=> \$d,
		"r!"	=> \$r,
		"p!"	=> \$preserve_header,
		"h!"	=> \$h
	);

if( $h || ! defined($k) ){
	print STDERR <<__USAGE__;

flat-file aggregator

usage: $0 <opts> [file(s)]
opts:	-h			- print this message and exit
	-k <key-cells>		- cells for aggregation levels & output (req'd)
	-s <sum-value-cells>	- sum numeric values, skip non-numerics
	-c <count-value-cells>	- count non-null
	-d <delimiter>		- specifies input data delimiter (0xfe)
	-r			- don't sort the output by keys, preserve order
	-p			- preserve header - don't include first line
					in the aggregation

all column indexes are 1-based.  if neither sum or count columns are specified,
only key columns will be printed.  a side-effect of this is that you can cut
and reorder columns from input - the cut(1) utility always uses the original
order.

__USAGE__
	exit(1);
}

@keys = util::args::expand($k);
@sums = util::args::expand($s);
@cnts = util::args::expand($c);

# handle special case delimiters
$d = chr(0x09) if($d eq '\t');
my $sd = $d;			# delimiter to use for splitting
$sd = '\|' if($sd eq '|');

my %h = ();

if ( $preserve_header ) {
	my $hline = <>;
	$hline =~ s/[\r\n]//g;		# get rid of line breaks
	my $outheader = "";
	my @lp = split(/$sd/, $hline);
	foreach my $ki(@keys){
		if ( ! defined($lp[$ki - 1]) ) { $lp[$ki - 1] = ''; }
		$outheader .= $lp[$ki - 1] . $d;
	}
	$outheader =~ s/$sd$//;	# remove trailing delimiter
	if(defined($s) && length($s)){
		for my $si( 0 .. $#sums ){
			$outheader .= $d . $lp[$sums[$si] - 1];
		}
	}
	if(defined($c) && length($c)){
		for my $ci( 0 .. $#cnts ){
			if ( ! defined($lp[$cnts[$ci] - 1]) ) {
				$lp[$cnts[$ci] - 1] = '';
			}
			$outheader .= $d . $lp[$cnts[$ci] - 1];
		}
	}
	print $outheader, qq(\n);
}

while(<>){
	s/[\r\n]//g;		# get rid of newlines
	my $hk = "";		# hash key
	my @lp = split/$sd/;
	foreach my $ki(@keys){
		if ( ! defined($lp[$ki - 1]) ) { $lp[$ki - 1] = ''; }
		$hk .= $lp[$ki - 1] . $d;
	}
	$hk =~ s/$sd$//;	# remove trailing delimiter
	if(defined($s) && length($s)){
		for my $si( 0 .. $#sums ){
			next if( ! $lp[$sums[$si] - 1] ||
				$lp[$sums[$si] - 1] !~ /^\d*\.{0,1}\d*$/);
			$h{$hk}[0][$si] += $lp[$sums[$si] - 1];
		}
	}
	if(defined($c) && length($c)){
		for my $ci( 0 .. $#cnts ){
			if( defined($lp[$cnts[$ci] - 1]) &&
			    length($lp[$cnts[$ci] - 1]) ){
				$h{$hk}[1][$ci] += 1;
			}
		}
	}
	# make sure it gets defined if no aggregation, and have the line
	# number so the original order can be preserved if necessary
	$h{$hk}[2] = $.;
}

my @hashkeys;
if($r){
	@hashkeys = sort {oohashsort(\%h)} keys(%h);
} else {
	# a sort of the entire key as a single string will
	# result in screwy sort order because the delimiter
	# could make ("a$delim" > "ab$delim") true.
	@hashkeys = sort {delimited_field_sort($d)} keys %h;
}
foreach $k (@hashkeys){
	print $k;

# sum arrays will not be defined if the file's values were non-numeric
	if(defined($s) && defined($h{$k}[0])){
		for my $si ( 0 .. $#sums ) {
			${$h{$k}}[0][$si] = '0' unless (${$h{$k}}[0][$si]);
		}
		print $d, join($d, @{ $h{$k}[0] } );
	}

	if(defined($c) && defined($h{$k}[1])){
		for my $si ( 0 .. $#cnts) {
			${$h{$k}}[1][$si] = '0' unless (${$h{$k}}[1][$si]);
		}
		print $d, join($d, @{ $h{$k}[1] } );
	}
	print qq(\n);
}

exit(0);

# original order hash sort - hashref is param
sub oohashsort {
	my $hash = shift;
	return $hash->{$a}[2] cmp $hash->{$b}[2];
}

sub delimited_field_sort {
	my $delim = shift;
	my @aa = split(/\Q$delim\E/, $a);
	my @ba = split(/\Q$delim\E/, $b);

	my $retval;
	my $aa_sz = scalar(@aa);
	my $ba_sz = scalar(@ba);
	my $nfields = $aa_sz > $ba_sz ? $aa_sz : $ba_sz;

	for ( my $i = 0; $i < $nfields; $i++ ) {
		$retval = ($aa[$i] || '') cmp ($ba[$i] || '');
		if ( $retval != 0 ) {
			last;
		}
	}

	if ( $retval ) { return $retval }

	if ( $aa_sz > $ba_sz ) { return 1; }
	return -1;
}

